1 /*
2 Copyright: Marcelo S. N. Mancini (Hipreme|MrcSnm), 2018 - 2021
3 License:   [https://creativecommons.org/licenses/by/4.0/|CC BY-4.0 License].
4 Authors: Marcelo S. N. Mancini
5 
6 	Copyright Marcelo S. N. Mancini 2018 - 2021.
7 Distributed under the CC BY-4.0 License.
8    (See accompanying file LICENSE.txt or copy at
9 	https://creativecommons.org/licenses/by/4.0/
10 */
11 module hip.util..string;
12 public import hip.util.conv:to;
13 public import hip.util.to_string_range;
14 
15 version(WebAssembly) version = UseDRuntimeDecoder;
16 version(CustomRuntimeTest) version = UseDRuntimeDecoder;
17 version(PSVita) version = UseDRuntimeDecoder;
18 
19 /** 
20  *  RefCounted, @nogc string, OutputRange compatible, 
21  */
22 struct String
23 {
24     @nogc:
25     import core.stdc.string;
26     import core.stdc.stdlib;
27     import core.int128;
28     char[] chars;
29     private size_t _capacity;
30     private int* countPtr;
31     size_t length() const {return chars.length;}
32 
33     this(this)
34     {
35         if(countPtr !is null)
36             *countPtr = *countPtr + 1;
37     }
38 
39     private void initialize(size_t length)
40     {
41         if(length == 0)
42             length = 128;
43         this.chars = (cast(char*)malloc(length))[0..0];
44         this.countPtr = cast(int*)malloc(int.sizeof);
45         this._capacity = length;
46         this.chars.ptr[0.._capacity] = '\0';
47         *countPtr = 1;
48     }
49 
50     static auto opCall(string str)
51     {
52         String s;
53         s.initialize(str.length);
54         s.chars = s.chars.ptr[0..str.length];
55         s.chars[] = str[];
56         return s;
57     }
58     static auto opCall(const(char)* str){return opCall(str[0..strlen(str)]);}
59     static auto opCall(String str){return str;}
60 
61     private enum isAppendable(T) = is(T == String) || is(T == string) || is(T == immutable(char)*) || is(T == char);
62     
63     static auto opCall(Args...)(Args args)
64     {
65         import hip.util.conv:toStringRange;
66         String s;
67         s.initialize(128);
68         static foreach(a; args)
69         {
70             static if(isAppendable!(typeof(a)) )
71                 s~= a;
72             else static if(is(typeof(a) == struct) || __traits(compiles, toStringRange(s, a)))
73             {
74                 toStringRange(s, a);
75             }
76             else static if(__traits(hasMember, a, "toString"))
77                 s~= a.toString;
78             else static assert(false, "No conversion found");
79         }
80         return s;
81     }
82 
83     alias _opApplyFn = int delegate(char c) @nogc;
84     int opApply(scope _opApplyFn dg)
85     {
86         int result = 0;
87         for(int i = 0; i < length && result; i++)
88             result = dg(chars[i]);
89         return result;
90     }
91 
92     /**
93     *   If it was borrowed, allocate new memory.
94     */
95     bool updateBorrowed(size_t length)
96     {
97         if(countPtr == null) //Not initialized
98         {  
99             initialize(length);
100             return true;
101         }
102         else if(*countPtr != 1) //If it is borrowed
103         {
104             //Remove that old reference and initialize itself (something like when slices shares a common array)
105             char[] oldChars = chars;
106             *countPtr = *countPtr - 1;
107             initialize(length+this.length);
108             chars = chars.ptr[0..oldChars.length];
109             chars[0..oldChars.length] = oldChars[0..$];
110             return true;
111         }
112         return false;
113     }
114 
115     auto ref opOpAssign(string op, T)(T value)
116     if(op == "~")
117     {
118         String temp;
119         char[] chs;
120         static if(is(T == String))
121             chs = value.chars;
122         else static if (is(T == string) || is(T == char[]))
123             chs = cast(char[])value;
124         else static if(is(T == immutable(char)*))
125             chs = value[0..strlen(value)];
126         else static if(is(T == char))
127         {
128             char[1] _chContainer;
129             _chContainer[0] = value;
130             chs = _chContainer;
131         }
132         else
133         {
134             temp = String(value);
135             chs = temp.chars;
136         }
137         if(!updateBorrowed(chs.length) && chs.length + this.length >= this._capacity) //New size is greater than capacity
138             resize(cast(uint)((chs.length + this.length)*1.5));
139         memcpy(chars.ptr+length, chs.ptr, chs.length);
140         chars = chars.ptr[0..chars.length+chs.length];
141         return this;
142     }
143 
144     auto ref opAssign(string value)
145     {
146         if(countPtr is null)
147             chars = cast(char[])value; //Don't allocate memory for the string literal.
148         else
149         {
150             bool resized = updateBorrowed(value.length);
151             if(!resized)
152             {
153                 if(chars == null)
154                     initialize(value.length);
155                 else if(value.length > _capacity)
156                     resize(value.length);
157             }
158             chars.ptr[0..value.length] = value[];
159         }
160         return this;
161     }
162 
163     auto ref opAssign(immutable(char)* value)
164     {
165         opAssign(value[0..strlen(value)]);
166         return this;
167     }
168 
169     string opCast() const
170     {
171         return cast(string)chars[0..length];
172     }
173     string toString() const {return cast(string)chars;}
174 
175     pragma(inline, true) private void resize(size_t newSize)
176     {
177         chars = (cast(char*)realloc(chars.ptr, newSize))[0..chars.length];
178         _capacity = newSize;
179     }
180     ///Make this struct OutputRange compatible
181     void put(char c)
182     {
183         if(this.length + 1 >= this._capacity)
184             resize(cast(uint)((this.length+1)*1.5));
185         chars.ptr[length] = c;
186         chars = chars.ptr[0..length+1];
187     }
188     bool opEquals(R)(const R other) const
189     {
190         static if(is(R == typeof(null)))
191             return chars == null;
192         else static if(is(R == string))
193             return toString == other;
194         else static if(is(R == String))
195             return toString == other.toString;
196         else static assert(false, "Invalid comparison between String and "~R.stringof);
197     }
198     
199     /**
200     *   This function serves to allocate before put. This will make less allocations occur while iterating
201     * this struct as an OutputRange.
202     */
203     void preAllocate(uint howMuch)
204     {
205         if(length + howMuch > _capacity)
206             resize(_capacity + howMuch);
207     }
208     void preAllocate(ulong howMuch){preAllocate(cast(uint)howMuch);}
209 
210     ref auto opIndex(size_t index)
211     {
212         assert(index < length, "Index out of bounds");
213         return chars[index];
214     }
215 
216     ~this()
217     {
218         if(countPtr != null)
219         {
220             *countPtr = *countPtr - 1;
221             assert(*countPtr >= 0);
222             if(*countPtr == 0 && chars != null)
223             {
224                 free(chars.ptr);
225                 free(countPtr);
226             }
227             countPtr = null;
228             chars = null;
229         }
230     }
231 
232 }
233 
234 struct StringBuilder
235 {
236     private char[] builtString;
237     private uint builtLength;
238     string[] strings;
239     private uint stringsPtr = 0;
240     
241     void append(T)(T value)
242     {
243         if(stringsPtr == strings.length)
244         {
245             if(strings.length == 0x10000) //65K (This will guarantee a reasonable amount of allocations)
246                 toString();
247             else
248             {
249                 //128 is a reasonable start, this way, no really small operation should matter on performance
250                 strings.length = strings.length == 0 ? 128 : strings.length * 2;
251             }
252         }
253         strings[stringsPtr++] = value;
254     }
255     string toString()
256     {
257         import core.stdc.string:memcpy;
258         if(stringsPtr == 0) return cast(string)builtString[0..builtLength];
259         uint count = builtLength;
260         uint i = builtLength;
261         foreach(s;strings[0..stringsPtr])
262             count+= s.length;
263         builtString.length = count;
264         
265         foreach(s; strings[0..stringsPtr])
266         {
267             memcpy(builtString.ptr+i, s.ptr, s.length);
268             i+= s.length;
269         }
270         builtLength = count;
271         stringsPtr = 0;
272         return cast(string)builtString[0..builtLength];
273     }
274     auto ref opAssign(T)(T value) if(is(T == string))
275     {
276         builtString.length = value.length;
277         foreach(i, c; s)
278             builtString[i] = c;
279         stringsPtr = 0;
280         builtLength = cast(typeof(builtLength))value.length;
281 
282         return this;
283     }
284     auto ref opOpAssign(string op, T)(T value) if(op == "~")
285     {
286         import std.traits:isArray;
287         static if(isArray!T && !is(T == string))
288             foreach(v; value) append(v);
289         else
290             append(value);
291         return this;
292     }
293     ref auto opIndex(size_t index){return toString()[index];}
294     uint length(){return builtLength;}
295     ~this(){strings.length = 0;}
296 
297     ///Interface for OutputRange
298     alias put = append;
299 }
300 
301 
302 pure dstring toUTF32(string encoded)
303 {
304     dstring decoded;
305     version(UseDRuntimeDecoder)
306     {
307         foreach(dchar ch; encoded) decoded~= ch;
308     }
309     else
310     {
311         static import std.utf;
312         decoded = std.utf.toUTF32(encoded);
313     }
314     return decoded;
315 }
316 
317 pure TString replaceAll(TChar, TString = TChar[])(TString str, TChar what, TString replaceWith = "")
318 {
319     string ret;
320     for(int i = 0; i < str.length; i++)
321     {
322         if(str[i] != what) ret~= str[i];
323         else if(replaceWith != "") ret~=replaceWith;
324     }
325     return ret;
326 }
327 
328 pure TString replaceAll(TString)(TString str, TString what, TString replaceWith = "")
329 {
330     char[] ret;
331     int last;
332     int i;
333     do
334     {
335         i = indexOf(str, what, i);
336         if(i != -1)
337         {
338             int copyLength = i - last;
339             int currLength = cast(int)ret.length;
340             ret.length+= copyLength+replaceWith.length;
341             //Copy old content
342             ret[currLength..currLength+copyLength] = str[last..i];
343             //Copy replace
344             ret[currLength+copyLength..$] = replaceWith[];
345             //Skip what
346             i+= what.length;
347             last = i;
348         }
349     } while(i != -1);
350 
351     int copyLength = cast(int)(str.length - last);
352     int currLength = cast(int)ret.length;
353     ret.length+= copyLength;
354     ret[currLength..$] = str[last..$];
355 
356     return cast(TString)ret;
357 }
358 
359 pure int indexOf (TString)(inout TString str,inout TString toFind, int startIndex = 0) nothrow @nogc @safe
360 {
361     if(!toFind.length)
362         return -1;
363     int left = 0;
364 
365     for(int i = startIndex; i < str.length; i++)
366     {
367         if(str[i] == toFind[left])
368         {
369             left++;
370             if(left == toFind.length)
371                 return (i+1) - left; //Remember that left is already out of bounds
372         }
373         else if(left > 0)
374             left--;
375     }
376     return -1;
377 }
378 
379 pure bool startsWith(TString)(inout TString str, inout TString withWhat) nothrow @nogc @safe
380 {
381     if(withWhat.length > str.length)
382         return false;
383     int index = 0;
384     while(index < withWhat.length && str[index] == withWhat[index])
385         index++;
386     return index == withWhat.length;
387 }
388 
389 /**
390 *   Same thing as startsWith, but returns the part after the afterWhat
391 */
392 pure string after(TString)(TString str, immutable TString afterWhat) nothrow @nogc @safe
393 {
394     bool has = str.startsWith(afterWhat);
395     if(!has)
396         return null;
397     return str[afterWhat.length..$];
398 }
399 
400 pure inout(TString) findAfter(TString)(inout TString str, inout TString afterWhat, int startIndex = 0) nothrow @nogc @safe
401 {
402     int afterWhatIndex = str.indexOf(afterWhat, startIndex);
403     if(afterWhatIndex == -1)
404         return null;
405     return str[afterWhatIndex+afterWhat.length..$];
406 }
407 
408 /**
409 *   Returns the content that is between `left` and `right`:
410 ```d
411 string test = `string containing a "thing"`;
412 writeln(test.between(`"`, `"`)); //thing
413 ```
414 */
415 pure inout(TString) between(TString)(inout TString str, inout TString left, inout TString right, int start = 0) nothrow @nogc @safe
416 {
417     int leftIndex = str.indexOf(left, start);
418     if(leftIndex == -1) return null;
419     int rightIndex = str.indexOf(right, leftIndex+1);
420     if(rightIndex == -1) return null;
421 
422     return str[leftIndex+1..rightIndex];
423 }
424 
425 pure int indexOf(TChar)(inout TChar[] str, inout TChar ch, int startIndex = 0) nothrow @nogc @trusted
426 {
427     char[1] temp = [ch];
428     return indexOf(str, cast(TChar[])temp, startIndex);
429 }
430 
431 
432 TString repeat(TString)(TString str, size_t repeatQuant)
433 {
434     TString ret;
435     for(int i = 0; i < repeatQuant; i++)
436         ret~= str;
437     return ret;
438 }
439 
440 pure int count(TString)(inout TString str, inout TString countWhat) nothrow @nogc @safe
441 {
442     int ret = 0;
443     int index = 0;
444 
445     //Navigates using indexOf
446     while((index = str.indexOf(countWhat, index)) != -1)
447     {
448         index+= countWhat.length;
449         ret++;
450     }
451     return ret;
452 }
453 
454 alias countUntil = indexOf;
455 
456 int lastIndexOf(TString)(inout TString str,inout TString toFind, int startIndex = -1) pure nothrow @nogc @safe
457 {
458     if(startIndex == -1) startIndex = cast(int)(str.length)-1;
459 
460     int maxToFind = cast(int)toFind.length - 1;
461     int right = maxToFind;
462     if(right < 0) return -1; //Empty string case 
463     
464     
465     for(int i = startIndex; i >= 0; i--)
466     {
467         if(str[i] == toFind[right])
468         {
469             right--;
470             if(right == -1)
471                 return i;
472         }
473         else if(right < maxToFind)
474             right++;
475     }
476     return -1;
477 }
478 int lastIndexOf(TChar)(TChar[] str, TChar ch, int startIndex = -1) pure nothrow @nogc @trusted
479 {
480     TChar[1] temp = [ch];
481     return lastIndexOf(str, cast(TChar[])temp, startIndex);
482 }
483 
484 T toDefault(T)(string s, T defaultValue = T.init)
485 {
486     if(s == "")
487         return defaultValue;
488     T v = defaultValue;
489     try{v = to!(T)(s);}
490     catch(Exception e){}
491     return v;
492 }
493 
494 string fromStringz(const char* cstr) pure nothrow @nogc
495 {
496     import core.stdc.string:strlen;
497     size_t len = strlen(cstr);
498     return (len) ? cast(string)cstr[0..len] : null;
499 }
500 
501 const(char)* toStringz(string str) pure nothrow
502 {
503     return (str~"\0").ptr;
504 }
505 pragma(inline, true) char toLowerCase(char c) pure nothrow @safe @nogc 
506 {
507     if(c < 'A' || c > 'Z')
508         return c;
509     return cast(char)(c + ('a' - 'A'));
510 }
511 
512 string toLowerCase(string str)
513 {
514     char[] ret = new char[](str.length);
515     for(uint i = 0; i < str.length; i++)
516         ret[i] = str[i].toLowerCase;
517     return cast(string)ret;
518 }
519 
520 pragma(inline, true) char toUpper(char c) pure nothrow @nogc @safe
521 {
522     if(c < 'a' || c > 'z')
523         return c;
524     return cast(char)(c - ('a' - 'A'));
525 }
526 
527 string toUpper(string str) pure nothrow @safe
528 {
529     char[] ret = new char[](str.length);
530     for(uint i = 0; i < str.length; i++)
531         ret[i] = str[i].toUpper;
532     return ret;
533 }
534 
535 TChar[][] split(TChar)(TChar[] str, TChar separator) pure nothrow
536 {
537     TChar[1] sep = [separator];
538     return split(str, cast(TChar[])sep);
539 }
540 
541 TString[] split(TString)(TString str, TString separator) pure nothrow @safe
542 {
543     TString[] ret;
544     int last = 0;
545     int index = 0;
546     do
547     {
548         index = str.indexOf(separator, index);
549         if(index != -1)
550         {
551             ret~= str[last..index];
552         	last = index+= separator.length;
553         }
554     }
555     while(index != -1);
556     if(last != index)
557         ret~= str[last..$];
558     return ret;
559 }
560 
561 auto splitRange(TString, TStrSep)(TString str, TStrSep separator) pure nothrow @safe @nogc
562 {
563     struct SplitRange
564     {
565         TString strToSplit;
566         TStrSep sep;
567         TString frontStr;
568         int lastFound, index;
569 
570         bool empty(){return frontStr == null && index == -1 && lastFound == -1;}
571         TString front()
572         {
573             if(frontStr == "") popFront();
574             return frontStr;
575         }
576         void popFront()
577         {
578             if(index == -1 && lastFound == -1)
579             {
580                 frontStr = null;
581                 return;
582             }
583             index = indexOf(cast(TString)strToSplit, cast(TString)sep, index);
584             //When finding, take the string[lastFound..index]
585             if(index != -1)
586             {
587                 frontStr = strToSplit[lastFound..index];
588                 lastFound = index+= sep.length;
589             }
590             //If index not found and there was a last, take the string[lastFound..$]
591             else if(lastFound != 0)
592             {
593                 frontStr = strToSplit[lastFound..$];
594                 lastFound = -1;
595             }
596             //Just say there is no string
597             else
598                 lastFound = -1;
599         }
600     }
601 
602     return SplitRange(str, separator);
603 }
604 
605 
606 bool isNumber(TString)(in TString str) pure nothrow @nogc
607 {
608     if(!str)
609         return false;
610     bool isFirst = true;
611     bool hasDecimalSeparator = false;
612     foreach(c; str)
613     {
614         //Check for negative
615         if(isFirst)
616         {
617             isFirst = false;
618             if(c == '-')
619                 continue;
620         }
621         //Can only check for '.' once.
622         if(!hasDecimalSeparator && c == '.')
623             hasDecimalSeparator = true;
624         else if(c < '0' || c > '9')
625             return false;
626 
627     }
628     return true;
629 }
630 
631 /**
632 This function will get the number at the end of the string. Used when you have numbered items such as frames:
633 walk_01, walk_02, etc
634 ```d
635 "test123".getNumericEnding == "123"
636 "123abc".getNumericEnding == ""
637 "123".getNumericEnding == "123"
638 ```
639 */
640 string getNumericEnding(string s)
641 {
642     if(!s)
643         return "";
644     ptrdiff_t i = cast(ptrdiff_t)s.length - 1;
645     while(i >= 0)
646     {
647         if(!isNumeric(s[i]))
648             return s[i+1..$];
649         i--;
650     }
651     return s;
652 }
653 
654 
655 pragma(inline, true) bool isUpperCase(TChar)(TChar c) @nogc nothrow pure @safe
656 {
657     return c >= 'A' && c <= 'Z';
658 }
659 pragma(inline, true) bool isLowercase(TChar)(TChar c) @nogc nothrow pure @safe
660 {
661     return c >= 'a' && c <= 'z';
662 }
663 
664 pragma(inline, true) bool isAlpha(TChar)(TChar c) @nogc nothrow pure @safe
665 {
666     return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
667 }
668 
669 pragma(inline, true) bool isEndOfLine(TChar)(TChar c) @nogc nothrow pure @safe
670 {
671     return c == '\n' || c == '\r';
672 }
673 
674 pragma(inline, true) bool isNumeric(TChar)(TChar c) @nogc nothrow pure @safe
675 {
676     return (c >= '0' && c <= '9') || (c == '-');
677 }
678 pragma(inline, true) bool isWhitespace(TChar)(TChar c) @nogc nothrow pure @safe
679 {
680     return (c == ' ' || c == '\t' || c.isEndOfLine);
681 }
682 
683 TString[] pathSplliter(TString)(TString str)
684 {
685     TString[] ret;
686 
687     TString curr;
688     for(uint i = 0; i < str.length; i++)
689         if(str[i] == '/' || str[i] == '\\')
690         {
691             ret~= curr;
692             curr = null;
693         }
694         else
695             curr~= str[i];
696     ret~= curr;
697     return ret;
698 }
699 
700 
701 TString trim(TString)(TString str) pure nothrow @safe @nogc
702 {
703     if(str.length == 0)
704         return str;
705     
706     size_t start = 0;
707     size_t end = str.length - 1;
708     while(start < str.length && str[start].isWhitespace)
709         start++;
710    
711     while(end > 0 && str[end].isWhitespace)
712         end--;
713     
714     return str[start..end+1];
715 }
716 
717 TString join(TString)(TString[] args, TString separator = "")
718 {
719 	if(args.length == 0) return "";
720 	TString ret = args[0];
721 	for(int i = 1; i < args.length; i++)
722 		ret~=separator~args[i];
723 	return ret;
724 }
725 
726 unittest
727 {
728     assert(join(["hello", "world"], ", ") == "hello, world");
729     assert(split("hello world", " ").length == 2);
730     assert(toDefault!int("hello") == 0);
731     assert(lastIndexOf("hello, hello", "hello") == 7);
732     assert(indexOf("hello, hello", "hello") == 0);
733     assert(replaceAll("\nTest\n", '\n') == "Test");
734 
735     assert(trim(" \n  \thello there  \n \t") == "hello there");
736     assert(between(`string containing a "thing"`, `"`, `"`) == "thing");
737 
738     assert("test123".getNumericEnding == "123");
739     assert("123abc".getNumericEnding == "");
740     assert("123".getNumericEnding == "123");
741 }